0. set up

# source the cleaning/summarizing script, which sources the set up
source(here::here("code", "01_cleaning-and-summarizing.R"))

1. Biomass

a. Guiding questions

What’s the percent community biomass that can be attributed to:
1. Laminariales, and
2. the 11 abundant species from Miller et al.?

Data question: what is the difference between the percent cover data and the percent cover that’s in the column in the biomass data?

Stray thoughts: introduced/native species trait column?

b. Test case: SCTW from 3 different sampling points

To make sure the code was working, I tested this out with “SCTW_2010-07-29”, “SCTW_2016-07-20”, and “SCTW_2020-07-29”. The issue comes from scenarios where species were counted at more than one transect - for example, at SCTW_2020-07-29, EC was at both transect 2 and 3, but I only want the dry/wet biomass for EC for the whole survey.

When I create the data frame, there are then “duplicate” observations for total species biomass for EC because it was present at both transects. To fix this, I filtered the data frame for unique observations of the total and percent biomass calculations and double checked the total dry percent to make sure everything added up to 1.

i. Calculations

test <- biomass %>% 
  filter(sample_ID %in% c("SCTW_2010-07-29", "SCTW_2016-07-20", "SCTW_2020-07-29")) %>% 
  select(sample_ID, site, year, date, sp_code, dry_gm2, wm_gm2) %>% 
  # create a new column where total biomass for both surveys is calculated
  group_by(site, year) %>% 
  mutate(total_dry = sum(dry_gm2),
         total_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where total biomass for the species for the whole survey is calculated
  group_by(site, year, sp_code) %>% 
  mutate(total_sp_dry = sum(dry_gm2),
         total_sp_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where percent of total biomass per survey for each species is calculated
  mutate(percent_sp_dry = total_sp_dry/total_dry,
         percent_sp_wet = total_sp_wet/total_wet) %>% 
  # double check to make sure the percents worked out...
  select(sample_ID, site, year, date, sp_code, 
         # values that need to be unique: 
         total_sp_dry, total_sp_wet, percent_sp_dry, percent_sp_wet) %>% 
  unique() %>% 
  group_by(sample_ID) %>% 
  mutate(total_percent = sum(percent_sp_dry),
         total_percent_wet = sum(percent_sp_wet)) %>% 
  # join with coarse traits data frame, which includes taxonomy
  left_join(., coarse_traits, by = "sp_code")

ii. Plots

# plot of proportion total biomass by phylum faceted by sampling date
ggplot(test) +
  aes(x = taxon_phylum, y = percent_sp_dry) +
  # calculating sum of proportion within stat_summary
  stat_summary(aes(fill = taxon_phylum),
               fun = "sum", geom = "bar") +
  # adding labels for sum values
  stat_summary(aes(label = round(..y.., 2)), 
               fun = "sum", geom = "text", 
               size = 4, vjust = -0.5) +
  scale_fill_manual(values = c("Chlorophyta" = chloro_col, 
                               "Ochrophyta" = ochro_col, 
                               "Rhodophyta" = rhodo_col)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
  facet_wrap(~sample_ID) +
  theme_classic() +
  theme(legend.position = "none") +
  labs(x = "Phylum",
       y = "Proportion total biomass",
       title = "Biomass by phylum for SCTW")

# pretend timeseries
ggplot(test) +
  aes(x = date, y = percent_sp_dry) +
  # calculating sum of proportion within stat_summary
  stat_summary(aes(col = taxon_phylum),
               fun = "sum", geom = "line", size = 1) +
  stat_summary(aes(col = taxon_phylum),
               fun = "sum", geom = "point", size = 2) +
  scale_color_manual(values = c("Chlorophyta" = chloro_col, 
                                "Ochrophyta" = ochro_col, 
                                "Rhodophyta" = rhodo_col)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
  theme_bw() +
  # theme(legend.position = "none") +
  labs(x = "Sampling date",
       y = "Proportion total biomass",
       title = "Biomass by phylum through time for SCTW",
       col = "Phylum")

c. Biomass data set

Since this worked, I felt comfortable doing this for the whole biomass data frame.

i. Calculations

prop_biomass <- biomass %>% 
  filter(sp_code != "MAPY") %>% 
  select(sample_ID, site, year, date, sp_code, dry_gm2, wm_gm2) %>% 
  # create a new column where total biomass for the whole survey is calculated
  group_by(site, year) %>% 
  mutate(total_dry = sum(dry_gm2),
         total_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where total biomass for the species for the whole survey is calculated
  group_by(site, year, sp_code) %>% 
  mutate(total_sp_dry = sum(dry_gm2),
         total_sp_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where percent of total biomass per survey for each species is calculated
  mutate(percent_sp_dry = total_sp_dry/total_dry,
         percent_sp_wet = total_sp_wet/total_wet) %>% 
  # replace NaNs with 0 (which happens when there's nothing in the survey)
  # mutate_at(vars("percent_sp_dry", "percent_sp_wet"), list(~ ifelse(. = NaN, 0, .))) %>% 
  # double check to make sure the percents worked out...
  select(sample_ID, site, year, date, sp_code, 
         # values that need to be unique: 
         total_sp_dry, total_sp_wet, percent_sp_dry, percent_sp_wet) %>% 
  unique() %>% 
  group_by(sample_ID) %>% 
  mutate(total_percent = sum(percent_sp_dry),
         total_percent_wet = sum(percent_sp_wet)) %>% 
  # join with coarse traits data frame, which includes taxonomy
  left_join(., coarse_traits, by = "sp_code") %>% 
  # include a column for whether or not the species is in the "interesting species" list
  mutate(algae_interest = case_when(
    sp_code %in% algae_interest ~ "species of interest",
    TRUE ~ "other species"
  ))

ii. Plots

Made some plot functions to keep things tidy.

phylum.plot <- function(site_choice) {
  # choose a subtitle from sites_full
  subtitle <- pluck(sites_full, site_choice)
  
  ggplot(prop_biomass %>% filter(site == {{ site_choice }})) +
  aes(x = date, y = percent_sp_dry) +
  # calculating sum of proportion within stat_summary
  stat_summary(aes(col = taxon_phylum),
               fun = "sum", geom = "line", size = 1) +
  stat_summary(aes(col = taxon_phylum),
               fun = "sum", geom = "point", size = 2) +
  scale_color_manual(values = c("Chlorophyta" = chloro_col, 
                                "Ochrophyta" = ochro_col, 
                                "Rhodophyta" = rhodo_col)) +
  scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
  theme_bw() +
  # theme(legend.position = "none") +
  labs(x = "Sampling date",
       y = "Proportion total biomass",
       title = "Biomass by phylum through time",
       subtitle = subtitle,
       col = "Phylum")
}

growth.form.plot <- function(site_choice) {
  # choose a subtitle from sites_full
  subtitle <- pluck(sites_full, site_choice)
  
  ggplot(prop_biomass %>% filter(site == {{ site_choice }})) +
    aes(x = date, y = percent_sp_dry) +
    # calculating sum of proportion within stat_summary
    stat_summary(aes(col = growth_form),
                 fun = "sum", geom = "line", size = 1) +
    stat_summary(aes(col = growth_form),
                 fun = "sum", geom = "point", size = 2) +
    scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
    theme_bw() +
    labs(x = "Sampling date",
         y = "Proportion total biomass",
         title = "Biomass by growth form through time",
         subtitle = subtitle,
         col = "Growth form")
}

spp.interest.plot <- function(site_choice) {
  # choose a subtitle from sites_full
  subtitle <- pluck(sites_full, site_choice)
  
  ggplot(prop_biomass %>% filter(site == {{ site_choice }})) +
    aes(x = date, y = percent_sp_dry) +
    # calculating sum of proportion within stat_summary
    stat_summary(aes(col = algae_interest),
                 fun = "sum", geom = "line", size = 1) +
    stat_summary(aes(col = algae_interest),
                 fun = "sum", geom = "point", size = 2) +
    scale_y_continuous(limits = c(0, 1), expand = c(0, 0)) +
    theme_bw() +
    labs(x = "Sampling date",
         y = "Proportion total biomass",
         title = "Species of interest",
         subtitle = subtitle, 
         col = " ")
}

species.plot <- function(site_choice) {
  # choose a title from sites_full
  title <- pluck(sites_full, site_choice)
  
  df <- prop_biomass %>% ungroup() %>% filter(site == {{ site_choice }})
  
  plot_ly(data = df, 
          x = ~date, y = ~percent_sp_dry, 
          color = ~scientific_name, colors = c(cal_palette(name = "tidepool", type = "continuous")), 
          mode = "markers+lines") %>% 
    add_trace(mode = "markers+lines", type = "scatter") %>% 
    layout(title = title, 
           xaxis = list(title = "Date", nticks = 8), 
           yaxis = list(title = "Proportion total biomass", range = c(-0.01, 1)),
           legend = list(title = list(text = '<b> Scientific name </b>')))
}
phylum.plot("bull")

phylum.plot("aque")

phylum.plot("ahnd")
## Warning: Removed 116 rows containing non-finite values (stat_summary).
## Removed 116 rows containing non-finite values (stat_summary).

phylum.plot("napl")

phylum.plot("ivee")

phylum.plot("golb")

phylum.plot("abur")

phylum.plot("mohk")

phylum.plot("carp")

phylum.plot("scdi")

phylum.plot("sctw")

growth.form.plot("bull")

growth.form.plot("aque")

growth.form.plot("ahnd")
## Warning: Removed 116 rows containing non-finite values (stat_summary).
## Removed 116 rows containing non-finite values (stat_summary).

growth.form.plot("napl")

growth.form.plot("ivee")

growth.form.plot("golb")

growth.form.plot("abur")

growth.form.plot("mohk")

growth.form.plot("carp")

growth.form.plot("scdi")

growth.form.plot("sctw")

spp.interest.plot("bull")

spp.interest.plot("aque")

spp.interest.plot("ahnd")
## Warning: Removed 116 rows containing non-finite values (stat_summary).
## Removed 116 rows containing non-finite values (stat_summary).

spp.interest.plot("napl")

spp.interest.plot("ivee")

spp.interest.plot("golb")

spp.interest.plot("abur")

spp.interest.plot("mohk")

spp.interest.plot("carp")

spp.interest.plot("scdi")

spp.interest.plot("sctw")

species.plot("bull")
species.plot("aque")
species.plot("ahnd")
species.plot("napl")
species.plot("ivee")
species.plot("golb")
species.plot("abur")
species.plot("mohk")
species.plot("carp")
species.plot("scdi")
species.plot("sctw")

2. Tables of species contribution to biomass

table_df <- biomass %>% 
  select(site, scientific_name, dry_gm2, wm_gm2) %>% 
  mutate(site = fct_relevel(site, "bull", "aque", "ahnd", "napl", "ivee", "golb", "abur",
                            "mohk", "carp", "scdi", "sctw")) %>% 
  # filter out MAPY
  filter(scientific_name != "Macrocystis pyrifera") %>% 
  # create a new column where total biomass for the whole survey is calculated
  group_by(site) %>% 
  mutate(total_dry = sum(dry_gm2),
         total_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where total biomass for the species for the whole survey is calculated
  group_by(site, scientific_name) %>% 
  mutate(total_sp_dry = sum(dry_gm2),
         total_sp_wet = sum(wm_gm2)) %>% 
  ungroup() %>% 
  # create a new column where percent of total biomass per survey for each species is calculated
  mutate(percent_sp_dry = total_sp_dry/total_dry,
         percent_sp_wet = total_sp_wet/total_wet) %>% 
  # double check to make sure the percents worked out...
  select(site, scientific_name, 
         # values that need to be unique: 
         total_sp_dry, total_sp_wet, percent_sp_dry, percent_sp_wet) %>% 
  unique() %>% 
  group_by(site) %>% 
  mutate(total_percent = sum(percent_sp_dry),
         total_percent_wet = sum(percent_sp_wet)) %>% 
  select(scientific_name, site, percent_sp_dry) %>% 
  mutate(percent_sp_dry = round(percent_sp_dry, 4)) %>% 
  mutate(percent_whole_sp_dry = percent_sp_dry*100) %>% 
  select(-percent_sp_dry) %>% 
  mutate(site = str_to_upper(site)) %>% 
  pivot_wider(names_from = "site", values_from = "percent_whole_sp_dry") %>% 
  left_join(., coarse_traits, by = "scientific_name") %>% 
  slice(-2) %>% 
  select(scientific_name, sp_code, BULL, AQUE, AHND, NAPL, IVEE, GOLB, ABUR,
                            MOHK, CARP, SCDI, SCTW)

table <- table_df %>% 
  gt() %>% 
  data_color(columns = 3,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 3]), 0))) %>% 
  data_color(columns = 4,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 4]), 0))) %>% 
  data_color(columns = 5,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 5]), 0))) %>% 
  data_color(columns = 6,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 6]), 0))) %>% 
  data_color(columns = 7,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 7]), 0))) %>% 
  data_color(columns = 8,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 8]), 0))) %>% 
  data_color(columns = 9,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 9]), 0))) %>% 
  data_color(columns = 10,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 10]), 0))) %>% 
  data_color(columns = 11,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 11]), 0))) %>% 
  data_color(columns = 12,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 12]), 0))) %>% 
  data_color(columns = 13,
             colors = scales::col_numeric(palette = gradient_palette, 
                                          domain = c(max(table_df[, 13]), 0))) %>% 
  fmt_missing(columns = 3:13,
              missing_text = "--") %>% 
  tab_spanner(
    label = "Sites",
    columns = c(BULL, AQUE, AHND, NAPL, IVEE, GOLB, ABUR,
                MOHK, CARP, SCDI, SCTW)
  ) %>% 
  tab_spanner(
    label = "Species",
    columns = c("scientific_name", "sp_code")
  ) %>% 
  cols_label(
    scientific_name = "Scientific name",
    sp_code = "Species code",
  ) %>% 
  gtsave("sp_site_table-selection.png", path = here::here("tables"))